/*
  Copyright (c) 2012-2023, Intel Corporation

  SPDX-License-Identifier: BSD-3-Clause
*/

/*===========================================================================*\
|* Includes
\*===========================================================================*/
#include "algorithm.h"
#include "debug.h"
#include "stdio.h"

/*===========================================================================*\
|* GMRES
\*===========================================================================*/
/* upper_triangular_right_solve:
 * ----------------------------
 * Given upper triangular matrix R and rhs vector b, solve for
 * x.  This "solve" ignores the rows, columns of R that are greater than the
 * dimensions of x.
 */
void upper_triangular_right_solve(const DenseMatrix &R, const Vector &b, Vector &x) {
    // Dimensionality check
    ASSERT(R.rows() >= b.size());
    ASSERT(R.cols() >= x.size());
    ASSERT(b.size() >= x.size());

    int max_row = x.size() - 1;

    // first solve step:
    x[max_row] = b[max_row] / R(max_row, max_row);

    for (int row = max_row - 1; row >= 0; row--) {
        double xi = b[row];
        for (int col = max_row; col > row; col--)
            xi -= x[col] * R(row, col);
        x[row] = xi / R(row, row);
    }
}

/* create_rotation (used in gmres):
 * -------------------------------
 * Construct a Givens rotation to zero out the lowest non-zero entry in a partially
 * factored Hessenburg matrix.  Note that the previous Givens rotations should be
 * applied to this column before creating a new rotation.
 */
void create_rotation(const DenseMatrix &H, size_t col, Vector &Cn, Vector &Sn) {
    double a = H(col, col);
    double b = H(col + 1, col);
    double r;

    if (b == 0) {
        Cn[col] = copysign(1, a);
        Sn[col] = 0;
    } else if (a == 0) {
        Cn[col] = 0;
        Sn[col] = copysign(1, b);
    } else {
        r = sqrt(a * a + b * b);
        Sn[col] = -b / r;
        Cn[col] = a / r;
    }
}

/* Applies the 'col'th Givens rotation stored in vectors Sn and Cn to the 'col'th
 * column of the DenseMatrix M.  (Previous columns don't need the rotation applied b/c
 * presumeably, the first col-1 columns are already upper triangular, and so their
 * entries in the col and col+1 rows are 0.)
 */
void apply_rotation(DenseMatrix &H, size_t col, Vector &Cn, Vector &Sn) {
    double c = Cn[col];
    double s = Sn[col];
    double tmp = c * H(col, col) - s * H(col + 1, col);
    H(col + 1, col) = s * H(col, col) + c * H(col + 1, col);
    H(col, col) = tmp;
}

/* Applies the 'col'th Givens rotation to the vector.
 */
void apply_rotation(Vector &v, size_t col, Vector &Cn, Vector &Sn) {
    double a = v[col];
    double b = v[col + 1];

    double c = Cn[col];
    double s = Sn[col];

    v[col] = c * a - s * b;
    v[col + 1] = s * a + c * b;
}

/* Applies the first 'col' Givens rotations to the newly-created column
 * of H.  (Leaves other columns alone.)
 */
void update_column(DenseMatrix &H, size_t col, Vector &Cn, Vector &Sn) {
    for (size_t i = 0; i < col; i++) {
        double c = Cn[i];
        double s = Sn[i];
        double t = c * H(i, col) - s * H(i + 1, col);
        H(i + 1, col) = s * H(i, col) + c * H(i + 1, col);
        H(i, col) = t;
    }
}

/* After a new column has been added to the hessenburg matrix, factor it back into
 * an upper-triangular matrix by:
 * - applying the previous Givens rotations to the new column
 * - computing the new Givens rotation to make the column upper triangluar
 * - applying the new Givens rotation to the column, and
 * - applying the new Givens rotation to the solution vector
 */
void update_qr_decomp(DenseMatrix &H, Vector &s, size_t col, Vector &Cn, Vector &Sn) {
    update_column(H, col, Cn, Sn);
    create_rotation(H, col, Cn, Sn);
    apply_rotation(H, col, Cn, Sn);
    apply_rotation(s, col, Cn, Sn);
}

void gmres(const Matrix &A, const Vector &b, Vector &x, int num_iters, double max_err) {
    DEBUG_PRINT("gmres starting!\n");
    x.zero();

    ASSERT(A.rows() == A.cols());
    DenseMatrix Qstar(num_iters + 1, A.rows());
    DenseMatrix H(num_iters + 1, num_iters);

    // arrays for storing parameters of givens rotations
    Vector Sn(num_iters);
    Vector Cn(num_iters);

    // array for storing the rhs projected onto the hessenburg's column space
    Vector G(num_iters + 1);
    G.zero();

    double beta = b.norm();
    G[0] = beta;

    // temp vector, stores Aqi
    Vector w(A.rows());

    w.copy(b);
    w.normalize();
    Qstar.set_row(0, w);

    int iter = 0;
    Vector temp(A.rows(), false);
    double rel_err;

    while (iter < num_iters) {
        // w = Aqi
        Qstar.row(iter, temp);
        A.multiply(temp, w);

        // construct ith column of H, i+1th row of Qstar:
        for (int row = 0; row <= iter; row++) {
            Qstar.row(row, temp);
            H(row, iter) = temp.dot(w);
            w.add_ax(-H(row, iter), temp);
        }

        H(iter + 1, iter) = w.norm();
        w.divide(H(iter + 1, iter));
        Qstar.set_row(iter + 1, w);

        update_qr_decomp(H, G, iter, Cn, Sn);

        rel_err = fabs(G[iter + 1] / beta);

        if (rel_err < max_err)
            break;

        if (iter % 100 == 0)
            DEBUG_PRINT("Iter %d: %f err\n", iter, rel_err);

        iter++;
    }

    if (iter == num_iters) {
        fprintf(stderr, "Error: gmres failed to converge in %d iterations (relative err: %f)\n", num_iters, rel_err);
        exit(-1);
    }

    // We've reached an acceptable solution (?):

    DEBUG_PRINT("gmres completed in %d iterations (rel. resid. %f, max %f)\n", iter, rel_err, max_err);
    Vector y(iter + 1);
    upper_triangular_right_solve(H, G, y);
    for (int i = 0; i < iter + 1; i++) {
        Qstar.row(i, temp);
        x.add_ax(y[i], temp);
    }
}
